rm(list=ls(all=TRUE))
graphics.off()

library(rdd)
library(xtable)
library(dplyr)

setwd() # set working directory
load("data/data_alt_boot.rdata")

# limit to parties with at least one elected 
data_alt_elected <-
  data_alt %>%
  group_by(year, muncpr, candpart) %>%
  summarise(represented = sum(elected)>0)

data_alt <- 
  left_join(data_alt, data_alt_elected) %>%
  filter(represented == 1)

#count all candidates in both elections
N_all <- nrow(data_alt)

#drop all from closed and split lists 

data_alt <- 
  data_alt %>%
  filter(openlist == 1 & splitlist == 0)

#Count remaining candidates
N_openlist <- nrow(data_alt)

# Create clusters of year, municipality, and party

clusters           <- data.frame(unique(cbind(data_alt$year,data_alt$muncpr,data_alt$candpart)))
colnames(clusters) <- c("year", "muncpr", "candpart")
clusters$cluster   <- 1:nrow(clusters)
data_alt$year        <- as.factor(data_alt$year)
data_alt$muncpr      <- as.factor(data_alt$muncpr)
data_alt <- left_join(data_alt, clusters, by = c("year", "muncpr", "candpart"))

#drop if the marginal loser never wins in sim or the marginal winners always win
data_alt <- 
  data_alt %>%
  filter(marg.los != 0 & marg.win != 1)

#Count remaining candidates
N_margrace <- nrow(data_alt)

#drop candidates that always or never win

data_alt <- 
  data_alt %>%
  filter(simscore > 0 & simscore < 1)

#Count remaining candidates
N_marginal_candidates <- nrow(data_alt)

#if cases are extremely close they might have wrong signed simscore. Recode these to arbitrarily small simscore
data_alt$margsim[data_alt$margsim <= 0 & data_alt$elected == 1] <-  0.0001
data_alt$margsim[data_alt$margsim >= 0 & data_alt$elected == 0] <- -0.0001

#create variables for belonging to main left wing parties (including Social Liberals)
#and personal votes as proportion of list votes and municipality total votes
data_alt$mainleft      <- data_alt$candpart %in% c("A", "F", "B", "Oe")
data_alt$share_of_party <- data_alt$candvote/data_alt$ptotvote
data_alt$share_of_mun  <- data_alt$candvote/data_alt$mtotvavo

##Display distribution and test for validity with McCrary
quartz(width = 12, height = 6)
par(mfrow=c(1,2), 
    mar=c(5,4,3,1),
    cex = 1.1)

#plot density as histogram
hist(data_alt$margsim, 
     main = "Distribution of forcing variable",
     xlab = "Score relative to threshold",
     breaks = 50,
     xlim = c(-1,1))
box(lwd = 1.5)
abline(v=0, lty = 2, col="grey50")

#plot density with McCrary test 
McCrary <- DCdensity(data_alt$margsim)

box(lwd = 1.5)
title(main = "McCrary's density test \n for forcing variable",
      ylab = "Density",
      sub = paste("N =",length(data_alt$margsim)),
      xlab = "Score relative to threshold")
abline(v=0, lty = 2, col="grey50")
legend("topright", 
       paste("p(McCrary) =", 
       round(1000*McCrary)/1000),
       box.lwd = 0, 
       box.col = "white", 
       bg = "white")
box(lwd = 1.5)

##Estimate and plot RD 
#find Imbens-Kalyanaraman bandwidth
IK.bw  <- IKbandwidth(data_alt$margsim, data_alt$electedt1)
IK.bw2 <- IKbandwidth(data_alt$margsim, data_alt$rerun)

##rerunning and elected 
rdest    <- RDestimate(electedt1 ~ margsim, data = data_alt)
rdest.cl <- RDestimate(electedt1 ~ margsim, data = data_alt, cluster = data_alt$cluster)

graphics.off()
quartz(w=8, h =4)
par(mfrow=c(1,2), 
    mar=c(4,4,2,1),
    cex = 1.1)

plot(rdest, range = c(-.8,.8), ylim = c(0, 0.7))
title( ylab = "p(rerunning and elected)", xlab = "Votes relative to threshold")
#add rugplot to illustrate point density over running var
rugz1 <- quantile(data_alt$margsim, probs = 0:100/100, na.rm = TRUE)
rug(rugz1)
abline(v=0, lty = 2, col = "grey50")

#plot for rerunning
rdest1    <- RDestimate(rerun ~ margsim, data = data_alt)
rdest1.cl <- RDestimate(rerun ~ margsim, data = data_alt, cluster = data_alt$cluster)
plot(rdest1, range = c(-.8,.8), ylim = c(0, 0.7))
title( ylab = "p(rerunning)", xlab = "Votes relative to threshold")
rug(rugz1)
abline(v=0, lty = 2, col = "grey50")

##########################################
##########################################
####### descriptive statistics ###########
##########################################
##########################################

weight <- 1 - abs(data_alt$margsim)/IK.bw
weight <- weight[weight > 0]

desc_data <- subset(data_alt, 
                    abs(data_alt$margsim)<IK.bw, 
                    select = c(margsim,
                               electedt1,
                               rerun,
                               mtotvavo,
                               ptotvote,
                               candlino,
                               mainleft,
                               candvote,
                               share_of_party,
                               share_of_mun))
labels <- colnames(desc_data)

weighted_means  <- matrix(NA, nrow = ncol(desc_data), ncol = 3)
for (i in 2:ncol(desc_data)){
  weighted_means[i-1,1] <- round(weighted.mean(desc_data[,labels[i]], w = weight),4)
  weighted_means[i-1,2] <- round(weighted.mean(desc_data[desc_data$margsim > 0,labels[i]], 
                                             w = weight[desc_data$margsim > 0]),4)
  weighted_means[i-1,3] <- round(weighted.mean(desc_data[desc_data$margsim < 0,labels[i]], 
                                             w = weight[desc_data$margsim < 0]),4)
}

weighted_means[10,] <- c(nrow(desc_data),
                          nrow(subset(desc_data, margsim > 0)),
                          nrow(subset(desc_data, margsim < 0)))            
colnames(weighted_means) <- c("All within bandwidth", "Winners", "Losers")
rownames(weighted_means) <- c("Elected at t1",
                              "Running at t1",
                              "Votes in municipality t0",
                              "Votes for party t0",
                              "Number on list t0",
                              "Left wing party t0",
                              "Personal votes t0",
                              "Personal share of party votes t0",
                              "Personal share of municipality votes t0",
                              "N")


xtable(weighted_means, 
       digits = rbind(matrix(3, nrow = 2, ncol = 4),
                      matrix(0, nrow = 2, ncol = 4),
                      matrix(3, nrow = 3, ncol = 4),
                      matrix(4, nrow = 2, ncol = 4),
                      rep(0, 4))       )

##Summarize jumps in table

out1 <- cbind(rdest$est,
              rdest$se,
              rdest.cl$se,
              rdest$bw,
              rdest$obs)
colnames(out1) <- c("Estimate", "Std. Error", "Rob. Std. Error", "Bandwidth", "Observations")

out2 <- cbind(rdest1$est,
              rdest1$se,
              rdest1.cl$se,
              rdest1$bw,
              rdest1$obs)
colnames(out2) <- c("Estimate", "Std. Error", "Rob. Std. Error", "Bandwidth", "Observations")

digits_tab <- rbind(matrix(3, nrow = 4, ncol = 7), rep(0,7))

xtable(cbind(t(out1),t(out2)), digits = digits_tab)

##Find CIs

CI1 <- c(rdest$est[1]  + qnorm(0.025)*rdest$se[1],
         rdest$est[1]  + qnorm(0.975)*rdest$se[1])
CI2 <- c(rdest1$est[1] + qnorm(0.025)*rdest1$se[1],
         rdest1$est[1] + qnorm(0.975)*rdest1$se[1])

# Create table with N and attrition

Ns <- matrix(c(N_all, 
               N_openlist, 
               N_margrace,
               N_marginal_candidates,
               rdest$obs[1], 
               rdest1$obs[1]),
             ncol = 1)

colnames(Ns) <- "N"
rownames(Ns) <- c("All candidates running for represented parties",
                  "Removing closed and splitlist",
                  "Keeping only marginal races",
                  "Keeping only marginal candidates",
                  "Candidates within IK-bandwidth for rerunning and reelection",
                  "Candidates within IK-bandwidth for rerunning")

xtable(Ns, digits = 0)
